# 崩坏3官网爬虫配置文件

# 目标网站URL
TARGET_URLS = [
    'https://bh3.mihoyo.com/main',
    'https://bh3.mihoyo.com/valkyries'
]

# 爬虫设置
CRAWLER_SETTINGS = {
    'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
    'timeout': 30,
    'retry_times': 3,
    'delay_between_requests': 2,  # 请求间隔秒数
    'max_images_per_page': 100,   # 每页最大图片数量
    'image_min_size': 1024,       # 最小图片大小(字节)
    'supported_formats': ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.svg'],
    'headless': True,             # 是否无头模式运行浏览器
    'window_size': (1920, 1080)   # 浏览器窗口大小
}

# 存储设置
STORAGE_SETTINGS = {
    'base_dir': 'crawled_data',
    'images_dir': 'images',
    'data_dir': 'data',
    'logs_dir': 'logs',
    'create_subdirs_by_domain': True,
    'save_metadata': True,
    'metadata_format': 'json'  # json 或 csv
}

# 图片下载设置
IMAGE_SETTINGS = {
    'download_images': True,
    'max_file_size': 10 * 1024 * 1024,  # 10MB
    'quality_check': True,
    'resize_large_images': True,
    'max_width': 2048,
    'max_height': 2048
}

# Selenium设置
SELENIUM_SETTINGS = {
    'implicit_wait': 10,
    'page_load_timeout': 30,
    'script_timeout': 30,
    'enable_images': True,
    'enable_javascript': True
}

# 日志设置
LOGGING_SETTINGS = {
    'level': 'INFO',
    'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    'file_handler': True,
    'console_handler': True
}